# -*- coding: utf-8 -*-
# @Author: Tim Liu
# @Date: 2024-05-29
# @Last Modified by: Tim Liu
# @Last Modified time: 2025-01-04

# @Description: document ingestion request schema

from pydantic import BaseModel

class IngestRequest(BaseModel):
    """
    url: document url
    download_url: document web url
    kbase_id: knowledge base id
    file_type: file type, e.g. html/pdf/word/excel/ppt/website/...
    source_type: from source type, e.g. sharepoint/gooble drive/upload/...
    config: document's config fields, e.g.
        {
            "year": "xxx",
            "title": "xxx",
            "country": "xxx"
            ......
        }
    """
    url: str

    kbase_id: int = 0
    kbase_name: str = None
    file_type: str = None
    source_type: str = None
    
    title: str = None
    content: str = None

    # default chunk size
    chunk_size: int = 1000 
    
    vector_only: bool = False
        
    # specify document parser, e.g. gpt4o for image, docintel for pdf/image
    parser: str = None

    download_url: str = None
    
    config: dict = None
    